Beer Markets

beer
code
data-analysis
Author

Jake Starkey

Published

November 2, 2023

Question 1 🍺

For Question 1, run the following R command to read the beer market data.

library(tidyverse)
library(skimr)
beer_mkts <- read.csv('https://bcdanl.github.io/data/beer_markets.csv')
rmarkdown::paged_table(beer_mkts)

-Each observation in beer_mkts is a household-level transaction record for a purchase of beer.

Q1a

-Find the top 5 markets in terms of the total beer_floz. -Find the top 5 markets in terms of the total beer_floz of BUD LIGHT. -Find the top 5 markets in terms of the total beer_floz of BUSCH LIGHT. -Find the top 5 markets in terms of the total beer_floz of COORS LIGHT. -Find the top 5 markets in terms of the total beer_floz of MILLER LITE. -Find the top 5 markets in terms of the total beer_floz of NATURAL LIGHT.

Q1a1 <- beer_mkts %>% 
   group_by(market) %>% 
   summarize(beer_floz_tot = sum(beer_floz, na.rm = T)) %>% 
   arrange(-beer_floz_tot) %>% 
   slice(1:5)

Q1a_bud <- beer_mkts %>% 
  filter(brand == "BUD LIGHT") %>% 
  group_by(market) %>% 
  summarize(beer_floz_tot = sum(beer_floz, na.rm = T)) %>% 
  arrange(-beer_floz_tot) %>% 
  slice(1:5)

Q1a_busch <- beer_mkts %>% 
  filter(brand == "BUSCH LIGHT") %>% 
  group_by(market) %>% 
  summarize(beer_floz_tot = sum(beer_floz, na.rm = T)) %>% 
  arrange(-beer_floz_tot) %>% 
  slice(1:5)

Q1a_coors <- beer_mkts %>% 
  filter(brand == "COORS LIGHT") %>% 
  group_by(market) %>% 
  summarize(beer_floz_tot = sum(beer_floz, na.rm = T)) %>% 
  arrange(-beer_floz_tot) %>% 
  slice(1:5)

Q1a_miller <- beer_mkts %>% 
  filter(brand == "MILLER LITE") %>% 
  group_by(market) %>% 
  summarize(beer_floz_tot = sum(beer_floz, na.rm = T)) %>% 
  arrange(-beer_floz_tot) %>% 
  slice(1:5)

Q1a_natural <- beer_mkts %>% 
  filter(brand == "NATURAL LIGHT") %>% 
  group_by(market) %>% 
  summarize(beer_floz_tot = sum(beer_floz, na.rm = T)) %>% 
  arrange(-beer_floz_tot) %>% 
  slice(1:5)

Q1b🍻

-For households that purchased BUD LIGHT at least once, what fraction of households did purchase only BUD LIGHT?

-For households that purchased BUSCH LIGHT at least once, what fraction of households did purchase only BUSCH LIGHT?

-For households that purchased COORS LIGHT at least once, what fraction of households did purchase only COORS LIGHT?

-For households that purchased MILLER LITE at least once, what fraction of households did purchase only MILLER LITE?

-For households that purchased NATURAL LIGHT at least once, what fraction of households did purchase only NATURAL LIGHT?

-Which beer brand does have the largest proportion of such loyal consumers?

q1b <- beer_mkts %>% 
  mutate(bud = ifelse(brand=="BUD LIGHT", 1, 0), # 1 if brand=="BUD LIGHT"; 0 otherwise
         busch = ifelse(brand=="BUSCH LIGHT", 1, 0),
         coors = ifelse(brand=="COORS LIGHT", 1, 0),
         miller = ifelse(brand=="MILLER LITE", 1, 0),
         natural = ifelse(brand=="NATURAL LIGHT", 1, 0),
         .after = hh) %>% 
  select(hh:natural) %>%  # select the variables we need
  group_by(hh) %>% 
  summarise(n_transactions = n(), # number of beer transactions for each hh
            n_bud = sum(bud), # number of BUD LIGHT transactions for each hh
            n_busch = sum(busch), 
            n_coors = sum(coors), 
            n_miller = sum(miller), 
            n_natural = sum(natural) 
  ) %>% 
  summarise(loyal_bud = sum(n_transactions == n_bud) / sum(n_bud > 0), 
              # sum(n_transactions == n_bud) : the number of households that purchased BUD LIGHT only
              # sum(n_bud > 0) : the number of households that purchased BUD LIGHT at least once.
            loyal_busch = sum(n_transactions == n_busch) / sum(n_busch > 0),
            loyal_coors = sum(n_transactions == n_coors) / sum(n_coors > 0),
            loyal_miller = sum(n_transactions == n_miller) / sum(n_miller > 0),
            loyal_natural = sum(n_transactions == n_natural) / sum(n_natural > 0)
  )

q1b
# A tibble: 1 × 5
  loyal_bud loyal_busch loyal_coors loyal_miller loyal_natural
      <dbl>       <dbl>       <dbl>        <dbl>         <dbl>
1     0.660       0.473       0.639        0.631         0.510

Q1c 🍻

-For each household, calculate the number of beer transactions. -For each household, calculate the proportion of each beer brand choice.

q1c <- beer_mkts %>% 
  count(hh, brand) %>% 
  group_by(hh) %>% 
  mutate(n_tot = sum(n)) %>%  
  arrange(hh, brand) %>% 
  mutate( prop = n / n_tot ) 

q1c
# A tibble: 13,202 × 5
# Groups:   hh [10,408]
        hh brand           n n_tot  prop
     <int> <chr>       <int> <int> <dbl>
 1 2000235 BUD LIGHT       1     1 1    
 2 2000417 COORS LIGHT     8     8 1    
 3 2000711 COORS LIGHT    13    13 1    
 4 2000946 BUD LIGHT       1     2 0.5  
 5 2000946 MILLER LITE     1     2 0.5  
 6 2001521 BUD LIGHT       6    11 0.545
 7 2001521 COORS LIGHT     3    11 0.273
 8 2001521 MILLER LITE     2    11 0.182
 9 2001531 BUSCH LIGHT     1     1 1    
10 2001581 BUSCH LIGHT     5     5 1    
# ℹ 13,192 more rows